{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Building a Playwright Browser Agent\n",
    "\n",
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/llama-index-integrations/tools/llama-index-tools-playwright/examples/playwright_browser_agent.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
    "\n",
    "This tutorial walks through using the LLM tools provided by the [Playwright](https://playwright.dev/) to allow LLMs to easily navigate and scrape content from the Internet."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Instaniation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index-tools-playwright llama-index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# set up async playwright browser\n",
    "# To enable more llamaindex usecases, we only offer async playwright tools at the moment\n",
    "\n",
    "# install playwright\n",
    "!playwright install\n",
    "\n",
    "# This import is required only for jupyter notebooks, since they have their own eventloop\n",
    "import nest_asyncio\n",
    "\n",
    "nest_asyncio.apply()\n",
    "\n",
    "# import the tools\n",
    "from llama_index.tools.playwright.base import PlaywrightToolSpec\n",
    "\n",
    "# create the tools\n",
    "browser = await PlaywrightToolSpec.create_async_playwright_browser(headless=True)\n",
    "playwright_tool = PlaywrightToolSpec.from_async_browser(browser)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Testing the playwright tools"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Listing all tools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "click\n",
      "fill\n",
      "get_current_page\n",
      "extract_hyperlinks\n",
      "extract_text\n",
      "get_elements\n",
      "navigate_to\n",
      "navigate_back\n"
     ]
    }
   ],
   "source": [
    "playwright_tool_list = playwright_tool.to_tool_list()\n",
    "for tool in playwright_tool_list:\n",
    "    print(tool.metadata.name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Navigating to playwright doc website"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://playwright.dev/python/docs/intro\n"
     ]
    }
   ],
   "source": [
    "await playwright_tool.navigate_to(\"https://playwright.dev/python/docs/intro\")\n",
    "\n",
    "### Print the current page URL\n",
    "print(await playwright_tool.get_current_page())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Extract all hyperlinks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[\"/python/docs/actionability\", \"#introduction\", \"/python/docs/webview2\", \"/python/docs/dialogs\", \"/python/docs/api-testing\", \"/python/docs/navigations\", \"/python/docs/pom\", \"https://www.youtube.com/channel/UC46Zj8pDH5tDosqm1gd7WTg\", \"/python/docs/aria-snapshots\", \"#\", \"/python/docs/trace-viewer\", \"/python/\", \"/python/docs/handles\", \"/python/docs/input\", \"https://pypi.org/project/pytest-playwright/\", \"/python/docs/locators\", \"/dotnet/docs/intro\", \"/python/docs/codegen-intro\", \"/python/docs/auth\", \"/python/docs/browser-contexts\", \"/python/docs/other-locators\", \"https://www.linkedin.com/company/playwrightweb\", \"/python/docs/downloads\", \"https://twitter.com/playwrightweb\", \"/python/docs/intro\", \"/python/docs/intro#running-the-example-test\", \"/python/docs/frames\", \"/python/docs/release-notes\", \"#running-the-example-test\", \"#system-requirements\", \"/python/docs/evaluating\", \"/python/docs/writing-tests\", \"/python/docs/network\", \"/python/docs/screenshots\", \"/docs/intro\", \"/python/docs/videos\", \"/python/docs/languages\", \"https://learn.microsoft.com/en-us/training/modules/build-with-playwright/\", \"/python/docs/emulation\", \"https://dev.to/playwright\", \"https://aka.ms/playwright/discord\", \"/python/docs/test-assertions\", \"#whats-next\", \"/python/docs/api/class-playwright\", \"/python/docs/debug\", \"/java/docs/intro\", \"/python/docs/events\", \"#installing-playwright-pytest\", \"/python/docs/running-tests\", \"/python/docs/pages\", \"/python/docs/browsers\", \"https://github.com/microsoft/playwright-python\", \"/python/community/ambassadors\", \"/python/docs/mock\", \"/python/docs/chrome-extensions\", \"/python/docs/clock\", \"/python/community/learn-videos\", \"/python/docs/library\", \"/python/community/welcome\", \"/python/docs/trace-viewer-intro\", \"#updating-playwright\", \"https://stackoverflow.com/questions/tagged/playwright\", \"/python/docs/intro#installing-playwright-pytest\", \"/python/docs/extensibility\", \"#add-example-test\", \"#__docusaurus_skipToContent_fallback\", \"/python/docs/test-runners\", \"/python/docs/codegen\", \"https://anaconda.org/Microsoft/pytest-playwright\", \"/python/docs/ci-intro\", \"/python/community/feature-videos\"]\n"
     ]
    }
   ],
   "source": [
    "print(await playwright_tool.extract_hyperlinks())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Extract all text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Installation | Playwright Python Skip to main content Playwright for Python Docs API Python Python Node.js Java .NET Community Search ⌘ K Getting Started Installation Writing tests Generating tests Running and debugging tests Trace viewer Setting up CI Pytest Plugin Reference Getting started - Library Release notes Guides Actions Auto-waiting API testing Assertions Authentication Browsers Chrome extensions Clock Debugging Tests Dialogs Downloads Emulation Evaluating JavaScript Events Extensibility Frames Handles Isolation Locators Mock APIs Navigations Network Other locators Pages Page object models Screenshots Snapshot testing Test generator Trace viewer Videos WebView2 Integrations Supported languages Getting Started Installation On this page Installation Introduction ​ Playwright was created specifically to accommodate the needs of end-to-end testing. Playwright supports all modern rendering engines including Chromium, WebKit, and Firefox. Test on Windows, Linux, and macOS, locally or on CI, headless or headed with native mobile emulation. The Playwright library can be used as a general purpose browser automation tool, providing a powerful set of APIs to automate web applications, for both sync and async Python. This introduction describes the Playwright Pytest plugin, which is the recommended way to write end-to-end tests. You will learn How to install Playwright Pytest How to run the example test Installing Playwright Pytest ​ Playwright recommends using the official Playwright Pytest plugin to write end-to-end tests. It provides context isolation, running it on multiple browser configurations out of the box. Get started by installing Playwright and running the example test to see it in action. PyPI Anaconda Install the Pytest plugin : pip install pytest-playwright Install the Pytest plugin : conda config --add channels conda-forge conda config --add channels microsoft conda install pytest-playwright Install the required browsers: playwright install Add Example Test ​ Create a file that follows the test_ prefix convention, such as test_example.py , inside the current working directory or in a sub-directory with the code below. Make sure your test name also follows the test_ prefix convention. test_example.py import re from playwright . sync_api import Page , expect def test_has_title ( page : Page ) : page . goto ( \"https://playwright.dev/\" ) # Expect a title \"to contain\" a substring. expect ( page ) . to_have_title ( re . compile ( \"Playwright\" ) ) def test_get_started_link ( page : Page ) : page . goto ( \"https://playwright.dev/\" ) # Click the get started link. page . get_by_role ( \"link\" , name = \"Get started\" ) . click ( ) # Expects page to have a heading with the name of Installation. expect ( page . get_by_role ( \"heading\" , name = \"Installation\" ) ) . to_be_visible ( ) Running the Example Test ​ By default tests will be run on chromium. This can be configured via the CLI options . Tests are run in headless mode meaning no browser UI will open up when running the tests. Results of the tests and test logs will be shown in the terminal. pytest Updating Playwright ​ To update Playwright to the latest version run the following command: pip install pytest-playwright playwright -U System requirements ​ Python 3.8 or higher. Windows 10+, Windows Server 2016+ or Windows Subsystem for Linux (WSL). macOS 13 Ventura, or later. Debian 12, Ubuntu 22.04, Ubuntu 24.04, on x86-64 and arm64 architecture. What's next ​ Write tests using web first assertions, page fixtures and locators Run single test, multiple tests, headed mode Generate tests with Codegen See a trace of your tests Next Writing tests Introduction Installing Playwright Pytest Add Example Test Running the Example Test Updating Playwright System requirements What's next Learn Getting started Playwright Training Learn Videos Feature Videos Community Stack Overflow Discord Twitter LinkedIn More GitHub YouTube Blog Ambassadors Copyright © 2025 Microsoft\n"
     ]
    }
   ],
   "source": [
    "print(await playwright_tool.extract_text())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get element\n",
    "Get element attributes for navigating to the next page.\n",
    "You can retrieve the selector from google chrome dev tools."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{\"innerText\": \"Next\\nWriting tests\"}]\n"
     ]
    }
   ],
   "source": [
    "element = await playwright_tool.get_elements(\n",
    "    selector=\"#__docusaurus_skipToContent_fallback > div > div > main > div > div > div.col.docItemCol_VOVn > div > nav > a\",\n",
    "    attributes=[\"innerText\"],\n",
    ")\n",
    "print(element)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Click\n",
    "Click on the search bar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"Clicked element '#__docusaurus > nav > div.navbar__inner > div.navbar__items.navbar__items--right > div.navbarSearchContainer_Bca1 > button'\""
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "await playwright_tool.click(\n",
    "    selector=\"#__docusaurus > nav > div.navbar__inner > div.navbar__items.navbar__items--right > div.navbarSearchContainer_Bca1 > button\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Fill\n",
    "Fill in the search bar with \"Mouse click\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"Filled element '#docsearch-input'\""
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "await playwright_tool.fill(selector=\"#docsearch-input\", value=\"Mouse click\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Click on the first result, we should be redirected to the Mouse click page"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://playwright.dev/python/docs/input#mouse-click\n"
     ]
    }
   ],
   "source": [
    "await playwright_tool.click(selector=\"#docsearch-hits0-item-0\")\n",
    "print(await playwright_tool.get_current_page())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using the playwright tool with agent\n",
    "To get started, you will need an [OpenAI api key](https://platform.openai.com/account/api-keys)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# set your openai key, if using openai\n",
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core.agent import FunctionAgent\n",
    "from llama_index.llms.openai import OpenAI\n",
    "\n",
    "playwright_tool_list = playwright_tool.to_tool_list()\n",
    "\n",
    "agent = FunctionAgent(\n",
    "    tools=playwright_tool_list,\n",
    "    llm=OpenAI(model=\"gpt-4o\"),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sam Altman's blog post on productivity offers a comprehensive guide to enhancing personal efficiency and effectiveness. Here are the key points:\n",
      "\n",
      "1. **Compound Growth**: Altman emphasizes the importance of small productivity gains compounded over time, likening it to financial growth.\n",
      "\n",
      "2. **Choosing the Right Work**: The most crucial aspect of productivity is selecting the right tasks. Independent thought and strong personal beliefs are vital.\n",
      "\n",
      "3. **Delegation and Enjoyment**: Delegate tasks based on others' strengths and interests. Avoid work that doesn't interest you, as it hampers morale and productivity.\n",
      "\n",
      "4. **Learning and Collaboration**: Embrace the ability to learn quickly and surround yourself with inspiring, positive people.\n",
      "\n",
      "5. **Prioritization**: Altman uses lists to manage tasks and prioritizes work that builds momentum. He stresses the importance of saying no to non-critical tasks.\n",
      "\n",
      "6. **Time Management**: Avoid unnecessary meetings and optimize your schedule for productivity, leaving room for serendipitous encounters.\n",
      "\n",
      "7. **Physical Well-being**: Sleep, exercise, and nutrition are critical. Altman shares his personal routines, including sleep optimization, exercise, and dietary habits.\n",
      "\n",
      "8. **Workspace and Tools**: A conducive workspace and efficient tools, like custom software and fast typing skills, enhance productivity.\n",
      "\n",
      "9. **Overcommitment**: A slight overcommitment can boost efficiency, but excessive overcommitment is detrimental.\n",
      "\n",
      "10. **Direction Matters**: Productivity is meaningless if directed towards the wrong goals. Focus on what truly matters.\n",
      "\n",
      "Altman concludes by emphasizing the importance of balancing productivity with personal happiness and relationships.\n"
     ]
    }
   ],
   "source": [
    "print(\n",
    "    await agent.run(\n",
    "        \"Navigate to https://blog.samaltman.com/productivity, extract the text on this page and return a summary of the article.\"\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using the playwright tool with agent workflow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.llms.openai import OpenAI\n",
    "from llama_index.core.agent.workflow import AgentWorkflow\n",
    "\n",
    "from llama_index.core.agent.workflow import (\n",
    "    AgentInput,\n",
    "    AgentOutput,\n",
    "    ToolCall,\n",
    "    ToolCallResult,\n",
    "    AgentStream,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OpenAI(model=\"gpt-4o\")\n",
    "\n",
    "workflow = AgentWorkflow.from_tools_or_functions(\n",
    "    playwright_tool_list,\n",
    "    llm=llm,\n",
    "    system_prompt=\"You are a helpful assistant that can do browser automation and data extraction\",\n",
    ")\n",
    "\n",
    "handler = workflow.run(\n",
    "    user_msg=\"Navigate to https://blog.samaltman.com/productivity, extract the text on this page and return a summary of the article.\"\n",
    ")\n",
    "\n",
    "async for event in handler.stream_events():\n",
    "    if isinstance(event, AgentStream):\n",
    "        print(event.delta, end=\"\", flush=True)\n",
    "        # print(event.response)  # the current full response\n",
    "        # print(event.raw)  # the raw llm api response\n",
    "        # print(event.current_agent_name)  # the current agent name\n",
    "    # elif isinstance(event, AgentInput):\n",
    "    # print(event.input)  # the current input messages\n",
    "    # print(event.current_agent_name)  # the current agent name\n",
    "    # elif isinstance(event, AgentOutput):\n",
    "    # print(event.response)  # the current full response\n",
    "    # print(event.tool_calls)  # the selected tool calls, if any\n",
    "    # print(event.raw)  # the raw llm api response\n",
    "    elif isinstance(event, ToolCallResult):\n",
    "        print(event.tool_name)  # the tool name\n",
    "        print(event.tool_kwargs)  # the tool kwargs\n",
    "        print(event.tool_output)  # the tool output\n",
    "    # elif isinstance(event, ToolCall):\n",
    "    # print(event.tool_name)  # the tool name\n",
    "    # print(event.tool_kwargs)  # the tool kwargs"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llama-index-zpEnpL0o-py3.12",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
